#!/usr/bin/python
# -*- coding: utf-8 -*-

''' Experiments with L3S wiki backup'''

from collections import defaultdict
import datetime

# limit the VM heap to 20 GB for soft limit and 22 GB for hard limit
# to avoid crashing the server
import utils
utils.alloc(21474836480, 23622320128)

def project_timeseries(logfile):
    d = defaultdict(int)
    with open(logfile, 'r') as f:
        for line in f:
            i = line.find('[')
            j = line.find(' ', i)
            dt = datetime.datetime.strptime(line[i+1:j], '%d/%b/%Y:%H:%M:%S')
            dstr = dt.strftime('%Y-%m-%d')
            i = line.find('"GET')
            j = line.find("/", i+1)
            proj = line[i+5:j]
            if not "HTTP" in proj and not " " in proj:
                d[(proj, dstr)] += 1

    sd = sorted(d.iteritems())
    for ((proj, t), cnt) in sd:
        with open(proj + '.csv', 'a') as o:
            o.write('%s\t%d\n' % (t, cnt))

import sys
project_timeseries(sys.argv[1])
